library(forecast)
library(dplyr)
library(tidyr)
library(stringr)
library(lubridate)
library(ggplot2)
theme_set(theme_bw())
library(weatherGen)
data(climate)
# set RNG seed for reproducibility
set.seed(2345)
This analysis will use the gridded climate dataset by Maurer et al, 2002. The data have already been downloaded and converted to an SQLite database using the makefile in walkerjeffd/climate-data/maurer. After following the instructions in this repo, there should be an SQLite database called maurer/db/maurer_mon.db, which contains a grid table containing the grid points, and a data table containing the monthly timeseries for each grid point. See the vignette for examples on how to use this database.
DB_PATH <- '/Users/jeff/Projects/UMass/climate-data/maurer/db/maurer_mon.db'
db <- src_sqlite(DB_PATH, create = FALSE)
df.grid <- tbl(db, "grid") %>%
collect
Here is a map of the climate data grid points.
library(ggmap)
map <- get_map(location=c(lon=mean(range(df.grid$LON)), lat=mean(range(df.grid$LAT))),
zoom=4, maptype="satellite", color='bw')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=36.625,-78.8125&zoom=4&size=%20640x640&scale=%202&maptype=satellite&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
ggmap(map, darken=c(0.25, "white"), extent="device") +
geom_point(aes(x=LON, y=LAT), data=df.grid, color='red', size=1)
Zoomed into new england. The green points will be used to compute a regional average.
map <- get_map(location=c(lon=-72, lat=42),
zoom=7, maptype="satellite", color='bw')
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=42,-72&zoom=7&size=%20640x640&scale=%202&maptype=satellite&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
ggmap(map, darken=c(0.25, "white"), extent="device") +
geom_point(aes(x=LON, y=LAT, color=SELECT),
data=mutate(df.grid, SELECT=(LAT>=41 & LAT<=43 & LON>=-74 & LON<=-70)), size=2) +
geom_vline(aes(xintercept=LON), color='green', linetype=2,
data=data.frame(LON=seq(-74, -70))) +
geom_hline(aes(yintercept=LAT), color='green', linetype=2,
data=data.frame(LAT=seq(41, 43))) +
scale_color_manual(values=c("TRUE"="green", "FALSE"="red"), guide=FALSE)
Using the selected grid points, retrieve the monthly timeseries from the database. Also, compute the variable TAVG as the arithmetic mean of TMIN and TMAX.
clim.mon <- tbl(db, "data") %>%
filter(LAT>=41, LAT<=43, LON>=-74, LON<=-70) %>%
collect %>%
select(-REGION, -WIND) %>%
mutate(TAVG=(TMIN+TMAX)/2) %>%
gather(VAR, VALUE, PRCP, TMIN, TMAX, TAVG)
summary(clim.mon)
## YEAR MONTH LAT LON
## Min. :1949 Min. : 1.00 Min. :41.1 Min. :-73.9
## 1st Qu.:1964 1st Qu.: 3.75 1st Qu.:41.7 1st Qu.:-73.3
## Median :1980 Median : 6.50 Median :42.1 Median :-72.4
## Mean :1980 Mean : 6.50 Mean :42.1 Mean :-72.4
## 3rd Qu.:1995 3rd Qu.: 9.25 3rd Qu.:42.6 3rd Qu.:-71.7
## Max. :2010 Max. :12.00 Max. :42.9 Max. :-70.1
## VAR VALUE
## PRCP:267840 Min. :-22.1
## TMIN:267840 1st Qu.: 3.2
## TMAX:267840 Median : 14.2
## TAVG:267840 Mean : 31.3
## 3rd Qu.: 28.3
## Max. :705.8
Compute annual timeseries of each climate variable and each location. Note that precipitation is simply the annual sum, while the other three variables are computed as weighted means based on the number of days in each month to get a correct annual mean.
clim.yr <- spread(clim.mon, VAR, VALUE) %>%
mutate(MONTHYEAR=ymd(paste(YEAR,MONTH,1,sep='-')),
N_DAY=days_in_month(MONTHYEAR)) %>%
group_by(LAT, LON, YEAR) %>%
summarise(PRCP=sum(PRCP),
TMAX=sum(TMAX*N_DAY)/sum(N_DAY),
TMIN=sum(TMIN*N_DAY)/sum(N_DAY),
TAVG=sum(TAVG*N_DAY)/sum(N_DAY)) %>%
gather(VAR, VALUE, PRCP:TAVG)
summary(clim.yr)
## LAT LON YEAR VAR
## Min. :41.1 Min. :-73.9 Min. :1949 PRCP:22320
## 1st Qu.:41.7 1st Qu.:-73.3 1st Qu.:1964 TMAX:22320
## Median :42.1 Median :-72.4 Median :1980 TMIN:22320
## Mean :42.1 Mean :-72.4 Mean :1980 TAVG:22320
## 3rd Qu.:42.6 3rd Qu.:-71.7 3rd Qu.:1995
## Max. :42.9 Max. :-70.1 Max. :2010
## VALUE
## Min. : -3.5
## 1st Qu.: 6.1
## Median : 11.6
## Mean : 302.8
## 3rd Qu.: 139.5
## Max. :1978.9
Compute the regional average monthly value of each climate variable.
clim.reg.mon <- clim.mon %>%
group_by(YEAR, MONTH, VAR) %>%
summarise(MEAN=mean(VALUE),
SD=sd(VALUE))
summary(clim.reg.mon)
## YEAR MONTH VAR MEAN
## Min. :1949 Min. : 1.00 PRCP:744 Min. :-14.9
## 1st Qu.:1964 1st Qu.: 3.75 TMIN:744 1st Qu.: 3.0
## Median :1980 Median : 6.50 TMAX:744 Median : 14.4
## Mean :1980 Mean : 6.50 TAVG:744 Mean : 31.3
## 3rd Qu.:1995 3rd Qu.: 9.25 3rd Qu.: 28.6
## Max. :2010 Max. :12.00 Max. :373.9
## SD
## Min. : 0.81
## 1st Qu.: 1.29
## Median : 1.64
## Mean : 7.10
## 3rd Qu.: 3.21
## Max. :110.64
This figure shows the regional average monthly climate timeseries. The shaded region shows mean +/- 1 standard deviation.
mutate(clim.reg.mon, DATE=ymd(paste(YEAR,MONTH,1,sep='-'))) %>%
ggplot(aes(DATE, MEAN)) +
geom_ribbon(aes(ymin=MEAN-SD, ymax=MEAN+SD), fill='grey50') +
geom_line(color='blue') +
facet_wrap(~VAR, ncol=1, scales='free_y') +
labs(x="Month/Year", y="Mean +/- StDev")
Compute the regional average annual value of each climate variable.
clim.reg.yr <- clim.yr %>%
group_by(VAR, YEAR) %>%
summarise(MEAN=mean(VALUE),
SD=sd(VALUE))
summary(clim.reg.yr)
## VAR YEAR MEAN SD
## PRCP:62 Min. :1949 Min. : 1.8 Min. : 1.01
## TMAX:62 1st Qu.:1964 1st Qu.: 7.1 1st Qu.: 1.25
## TMIN:62 Median :1980 Median : 11.9 Median : 1.53
## TAVG:62 Mean :1980 Mean : 302.8 Mean : 30.52
## 3rd Qu.:1995 3rd Qu.: 203.6 3rd Qu.: 19.48
## Max. :2010 Max. :1520.8 Max. :208.59
This figure shows the regional-average annual climate timeseries. The shaded region shows the mean +/- 1 standard deviation.
ggplot(clim.reg.yr, aes(YEAR, MEAN)) +
geom_ribbon(aes(ymin=MEAN-SD, ymax=MEAN+SD), fill='grey80') +
geom_line(color='blue') +
facet_wrap(~VAR, ncol=1, scales='free_y') +
labs(x="Year", y="Mean +/- StDev")
Convert the regional average datasets wide format for use in the weather generator.
clim.reg.mon <- select(clim.reg.mon, -SD) %>%
spread(VAR, MEAN)
head(clim.reg.mon)
## Source: local data frame [6 x 6]
##
## YEAR MONTH PRCP TMIN TMAX TAVG
## 1 1949 1 117.52 -4.818 4.243 -0.2873
## 2 1949 2 77.78 -6.123 4.882 -0.6205
## 3 1949 3 48.22 -3.279 8.025 2.3733
## 4 1949 4 98.86 2.510 15.229 8.8692
## 5 1949 5 108.86 7.175 21.098 14.1365
## 6 1949 6 23.19 13.578 27.450 20.5140
clim.reg.yr <- select(clim.reg.yr, -SD) %>%
spread(VAR, MEAN)
head(clim.reg.yr)
## Source: local data frame [6 x 5]
##
## YEAR PRCP TMAX TMIN TAVG
## 1 1949 912.9 16.22 4.169 10.194
## 2 1950 1075.9 14.50 2.944 8.724
## 3 1951 1281.2 14.94 3.561 9.249
## 4 1952 1168.9 15.13 3.754 9.443
## 5 1953 1329.0 16.07 4.205 10.139
## 6 1954 1291.9 14.66 3.370 9.014
Perform wavelet analysis on regional average annual precipitation timeseries.
wt.reg <- wavelet_analysis(clim.reg.yr$PRCP, sig.level=0.90, noise.type='white')
par(mfrow=c(1,2))
plot(wt.reg, plot.cb=TRUE, plot.phase=FALSE)
plot(wt.reg$gws, wt.reg$period, type="b",
xlab="Global Wavelet Spectrum", ylab="",
log="y", ylim=rev(range(wt.reg$period)),
xlim=range(c(wt.reg$gws, wt.reg$gws.sig$signif)))
lines(wt.reg$gws.sig$signif, wt.reg$period, lty=2, col='red')
Although there are some significant wavelet periods, we’ll use a simple arima model of the regional annual precipitation instead of arima models of the wavelet components.
models <- list(PRCP=auto.arima(clim.reg.yr$PRCP,
max.p=2,max.q=2,max.P=0,max.Q=0,
stationary=TRUE))
models[["PRCP"]]
## Series: clim.reg.yr$PRCP
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 intercept
## 0.218 1183.87
## s.e. 0.126 26.69
##
## sigma^2 estimated as 27243: log likelihood=-404.6
## AIC=815.2 AICc=815.6 BIC=821.6
This figure shows a 20-year forecast of the arima model reflecting no low-frequency oscillations (since we are not using the wavelet decomposition).
par(mfrow=c(1,1))
plot(forecast(models[['PRCP']], h=20), main="ARIMA Forecast of Regional Annual Precip",
xlab="Timestep (yr)", ylab='Annual Precip (mm/yr)')
Generate 50 simulations of annual precipitation using the AR1 model of the regional annual precipitation.
sim.prcp <- weatherGen::mc_arimas(models=models, n=nrow(clim.reg.yr), n.iter=50)
str(sim.prcp)
## List of 4
## $ x : num [1:62, 1:50] 1063 1355 1381 1024 1460 ...
## $ gws : num [1:21, 1:50] 9569 14863 21023 27508 23138 ...
## $ x.stat : num [1:62, 1:3] 1168 1178 1188 1182 1185 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:3] "MEAN" "Q025" "Q975"
## $ gws.stat: num [1:21, 1:3] 12705 20034 21128 22864 24353 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:3] "MEAN" "Q025" "Q975"
Compare the GWS power and the mean, standard deviation, and skewness statistics across these simulations (red point is the overall mean, boxplots are the distribution across simulations) to regional annual precipitation timeseries (blue point).
par(mfrow=c(2,2))
x1 <- 1
x2 <- min(length(wt.reg$period), nrow(sim.prcp$gws))
ymin <- min(wt.reg$period[x1:x2], wt.reg$gws[x1:x2],
sim.prcp$gws.stat[x1:x2, 'MEAN'],
sim.prcp$gws.stat[x1:x2, 'Q025'])
ymax <- max(wt.reg$period[x1:x2], wt.reg$gws[x1:x2],
sim.prcp$gws.stat[x1:x2, 'MEAN'],
sim.prcp$gws.stat[x1:x2, 'Q975'])
plot(wt.reg$period[x1:x2], wt.reg$gws[x1:x2],
type="n", ylim=c(ymin,ymax), xlab="Period (years)", ylab="",
main="", log="y")
mtext(side=2, expression(paste("Power (",mm^2,")")), line=2.5)
polygon(c(wt.reg$period[x1:x2],
rev(wt.reg$period[x1:x2])),
c(sim.prcp$gws.stat[x1:x2, 'Q025'],
rev(sim.prcp$gws.stat[x1:x2, 'Q975'])),
col="grey")
lines(wt.reg$period[x1:x2], wt.reg$gws[x1:x2])
lines(wt.reg$period[x1:x2], sim.prcp$gws.stat[x1:x2, 'MEAN'], lty=2, col="blue")
lines(wt.reg$period[x1:x2], wt.reg$gws.sig$signif[x1:x2], col="red", lty=3)
boxplot(colMeans(sim.prcp$x), main="Mean")
points(mean(clim.reg.yr$PRCP),col="red",pch=19)
points(mean(colMeans(sim.prcp$x)), col="blue", pch=19)
boxplot(apply(sim.prcp$x, 2, sd), main="Standard Deviation")
points(sd(clim.reg.yr$PRCP), col="red", pch=19)
points(mean(apply(sim.prcp$x, 2, sd)), col="blue", pch=19)
boxplot(apply(sim.prcp$x, 2, skewness), main="Skew")
points(skewness(clim.reg.yr$PRCP), col="red", pch=19)
points(mean(apply(sim.prcp$x, 2, skewness)),col="blue",pch=19)
This figure shows the simulated and observed regional-average annual precipitation. The simulations were generated using the AR1 model described above.
as.data.frame(sim.prcp$x) %>%
mutate(TIMESTEP=row_number()) %>%
gather(TRIAL, VALUE, -TIMESTEP) %>%
ggplot(aes(TIMESTEP, VALUE)) +
geom_line(aes(group=TRIAL), alpha=0.3) +
stat_summary(fun.y=mean, geom='line', color='red', size=1) +
geom_line(aes(x=TIMESTEP, y=PRCP), data=mutate(clim.reg.yr, TIMESTEP=row_number()), color='blue', size=1) +
labs(x="Year", y="Annual Precipitation (mm/yr)",
title="WARM Annual Precipitation Simulations\nRed Line=Mean of Simulations, Blue Line=Observd Regional Mean")
First, select a random location from the complete climate dataset.
clim.locs <- select(clim.yr, LAT, LON) %>%
unique()
loc <- clim.locs[sample(nrow(clim.locs), size=1),] %>% as.numeric
names(loc) <- c('LAT', 'LON')
loc
## LAT LON
## 42.19 -72.56
Now extract the monthly and annual climate datasets for this location.
clim.loc.yr <- filter(clim.yr, LAT==loc[['LAT']], LON==loc[['LON']]) %>%
spread(VAR, VALUE) %>%
arrange(YEAR)
clim.loc.mon <- filter(clim.mon, LAT==loc[['LAT']], LON==loc[['LON']]) %>%
spread(VAR, VALUE) %>%
arrange(YEAR, MONTH)
This figure shows the original climate timeseries for the regional-average dataset and the selected location.
clim.loc.mon %>%
select(-LAT, -LON) %>%
mutate(SOURCE="Location") %>%
rbind(clim.reg.mon %>% mutate(SOURCE="Region")) %>%
mutate(SOURCE=factor(SOURCE)) %>%
gather(VAR, VALUE, PRCP:TAVG) %>%
mutate(DATE=ymd(paste(YEAR, MONTH, 1, sep='-'))) %>%
ggplot(aes(DATE, VALUE, color=SOURCE)) +
geom_line() +
labs(x="Month/Year", y="") +
scale_color_manual('', values=c('steelblue', 'orangered')) +
facet_wrap(~VAR, ncol=1, scales='free_y')
Compute the euclidian distance between a single simulated annual precipitation value and each observed year. The simulated annual precipitation is for the first year in the first simulation trial.
j <- 1 # year number
samp <- 1 # simulation trial number
# compute distances between simulated WARM timeseries and observed regional precip
distances <- cbind(clim.reg.yr, DISTANCE=sqrt((sim.prcp$x[j,samp] - clim.reg.yr$PRCP)^2))
ggplot(distances, aes(YEAR, PRCP)) +
geom_line() +
geom_point(aes(color=DISTANCE), size=3) +
geom_hline(yint=sim.prcp$x[j,samp], linetype=2, color='red') +
geom_text(x=max(distances$YEAR), y=sim.prcp$x[j,samp], label="Sim", hjust=1, vjust=-0.5) +
scale_color_gradient(low='green', high='red') +
labs(x="Year", y="Annual Precipitation (mm/yr)",
title="Regional and Current Simulation Annual Precipitation")
Select the eight years from the observed regional annual simulation that are the most similar to the first simulation trial and year.
distances <- mutate(distances,
IN_TOP_8=rank(DISTANCE) %in% seq(1,8))
distances %>%
ggplot(aes(YEAR, PRCP)) +
geom_line() +
geom_point(aes(color=DISTANCE), size=3) +
geom_hline(yint=sim.prcp$x[j,samp], linetype=2, color='red') +
geom_point(mapping=aes(alpha=IN_TOP_8), color='black', shape=1, size=4) +
geom_text(aes(x=YEAR, y=PRCP, label=YEAR), data=filter(distances, IN_TOP_8), vjust=-1) +
geom_text(x=max(distances$YEAR), y=sim.prcp$x[j,samp], label="Sim", hjust=1, vjust=-0.5) +
scale_alpha_manual(values=c("TRUE"=1, "FALSE"=0), guide=FALSE) +
scale_color_gradient(low='green', high='red') +
labs(x="Year", y="Annual Precipitation (mm)", title="Regional Annual Precipitation with 8 Nearest Neighbors Selected")
Extract the observed regional precipitation for the candidate years and compute the sampling weights as inverse distance-squared weighted.
distances.top <- filter(distances, IN_TOP_8) %>%
mutate(WEIGHT=(1/DISTANCE)^2,
WEIGHT=WEIGHT/sum(WEIGHT))
distances.top %>%
ggplot(aes(PRCP, WEIGHT)) +
geom_point() +
geom_vline(xint=sim.prcp$x[j,samp], linetype=2, color='red') +
geom_text(aes(label=YEAR), hjust=-0.1) +
geom_text(x=sim.prcp$x[j,samp], y=0.4, label="Sim Precip", hjust=-0.1) +
labs(x="Annual Precipitation (mm)", y="Sample Weight")
Now we can randomly sample from the 8 nearest neighbors using the sampling weights based on the inverse distance squared. We’ll do this 1000 times and compare the frequency of the samples to the sample weights.
sampled.years <- sample(distances.top$YEAR,
size=1000,
prob=distances.top$WEIGHT,
replace=TRUE)
sampled.years.tbl <- prop.table(table(sampled.years))
merge(distances.top,
data.frame(YEAR=names(sampled.years.tbl),
FREQ=as.vector(sampled.years.tbl)),
all.x=TRUE) %>%
select(YEAR, WEIGHT, FREQ) %>%
ggplot(aes(WEIGHT, FREQ)) +
geom_point(size=3) +
geom_abline(linetype=2) +
labs(x="Sample Weight", y="Sampled Frequency")
Now if we just sample for one year.
sampled.year <- sample(distances.top$YEAR, size=1, prob=distances.top$WEIGHT, replace=TRUE)
sampled.year
## [1] 1988
Then we can extract the observed monthly climate data for the sampled year from the observed dataset of the selected location.
filter(clim.loc.mon, YEAR==sampled.year)
## Source: local data frame [12 x 8]
##
## YEAR MONTH LAT LON PRCP TMIN TMAX TAVG
## 1 1988 1 42.19 -72.56 70.10 -12.27 0.82 -5.725
## 2 1988 2 42.19 -72.56 114.40 -9.54 3.15 -3.195
## 3 1988 3 42.19 -72.56 65.47 -4.25 8.95 2.350
## 4 1988 4 42.19 -72.56 86.18 2.33 13.58 7.955
## 5 1988 5 42.19 -72.56 95.32 8.04 21.73 14.885
## 6 1988 6 42.19 -72.56 54.65 10.93 26.58 18.755
## 7 1988 7 42.19 -72.56 174.73 16.68 30.01 23.345
## 8 1988 8 42.19 -72.56 115.75 15.87 29.60 22.735
## 9 1988 9 42.19 -72.56 39.65 8.05 23.39 15.720
## 10 1988 10 42.19 -72.56 62.00 1.47 14.67 8.070
## 11 1988 11 42.19 -72.56 159.30 -1.09 11.33 5.120
## 12 1988 12 42.19 -72.56 24.10 -8.07 3.63 -2.220
Given the selected location, repeat the nearest neighbor process for each of the num_sim simulations and num_year years, and store the result in a 3-dimensional array.
num_sim <- ncol(sim.prcp$x)
num_year <- nrow(sim.prcp$x) # number of composited years
num_months <- num_year*12
climate_variables <- select(clim.reg.yr, -YEAR) %>%
names()
sampled.year <- array(NA, c(num_year, num_sim))
gen.loc.mon.arr <- array(NA, c(num_months, length(climate_variables), num_sim))
# loop through simulations (trials)
for (samp in 1:num_sim) {
# loop through years
for (j in 1:num_year) {
# compute distances between simulated WARM timeseries and observed regional precip
distances <- mutate(clim.reg.yr,
DISTANCE=sqrt((sim.prcp$x[j, samp] - clim.reg.yr$PRCP)^2),
IN_TOP_8=rank(DISTANCE) %in% seq(1,8))
distances.top <- filter(distances, IN_TOP_8) %>%
mutate(WEIGHT=(1/DISTANCE)^2,
WEIGHT=WEIGHT/sum(WEIGHT))
sampled.year[j,samp] <- sample(distances.top$YEAR,
size=1,
prob=distances.top$WEIGHT,
replace=FALSE)
row.idx <- ((1+12*(j-1)):(12+12*(j-1)))
gen.loc.mon.arr[row.idx, , samp] <- data.matrix(filter(clim.loc.mon,
YEAR==sampled.year[j, samp]) %>%
select(PRCP, TMAX, TMIN, TAVG))
}
}
Now convert the 3-dim array to a list of dataframes with each element in the list containing the simulated data for a single variable.
gen.loc.mon <- sapply(climate_variables, function(v) {
i.var <- which(climate_variables==v)
df <- as.data.frame(gen.loc.mon.arr[,i.var,]) %>%
mutate(TIMESTEP=row_number(),
SIM_YEAR=rep(seq(1, num_year), each=12),
SIM_MONTH=rep(seq(1, 12), times=num_year),
VAR=v) %>%
gather(TRIAL, VALUE, -TIMESTEP, -SIM_YEAR, -SIM_MONTH, -VAR) %>%
mutate(TRIAL=as.numeric(TRIAL))
return(list(df))
})
str(gen.loc.mon)
## List of 4
## $ PRCP:'data.frame': 37200 obs. of 6 variables:
## ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VAR : chr [1:37200] "PRCP" "PRCP" "PRCP" "PRCP" ...
## ..$ TRIAL : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ VALUE : num [1:37200] 70.1 114.4 65.5 86.2 95.3 ...
## $ TMAX:'data.frame': 37200 obs. of 6 variables:
## ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VAR : chr [1:37200] "TMAX" "TMAX" "TMAX" "TMAX" ...
## ..$ TRIAL : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ VALUE : num [1:37200] 0.82 3.15 8.95 13.58 21.73 ...
## $ TMIN:'data.frame': 37200 obs. of 6 variables:
## ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VAR : chr [1:37200] "TMIN" "TMIN" "TMIN" "TMIN" ...
## ..$ TRIAL : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ VALUE : num [1:37200] -12.27 -9.54 -4.25 2.33 8.04 ...
## $ TAVG:'data.frame': 37200 obs. of 6 variables:
## ..$ TIMESTEP : int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ SIM_YEAR : int [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_MONTH: int [1:37200] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VAR : chr [1:37200] "TAVG" "TAVG" "TAVG" "TAVG" ...
## ..$ TRIAL : num [1:37200] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ VALUE : num [1:37200] -5.72 -3.19 2.35 7.96 14.88 ...
Aggregate the generated timeseries to compute annual values or each variable.
gen.loc.yr <- lapply(gen.loc.mon, function(df) {
if ('PRCP' %in% unique(df$VAR)) {
df.yr <- group_by(df, VAR, TRIAL, SIM_YEAR) %>%
summarise(VALUE=sum(VALUE))
} else {
df.yr <- group_by(df, VAR, TRIAL, SIM_YEAR) %>%
summarise(VALUE=mean(VALUE))
}
df.yr <- df.yr %>%
ungroup() %>%
as.data.frame()
return(df.yr)
})
str(gen.loc.yr)
## List of 4
## $ PRCP:'data.frame': 3100 obs. of 4 variables:
## ..$ VAR : chr [1:3100] "PRCP" "PRCP" "PRCP" "PRCP" ...
## ..$ TRIAL : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VALUE : num [1:3100] 1062 1274 1458 958 1245 ...
## $ TMAX:'data.frame': 3100 obs. of 4 variables:
## ..$ VAR : chr [1:3100] "TMAX" "TMAX" "TMAX" "TMAX" ...
## ..$ TRIAL : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VALUE : num [1:3100] 15.6 16.9 15 15.5 15.7 ...
## $ TMIN:'data.frame': 3100 obs. of 4 variables:
## ..$ VAR : chr [1:3100] "TMIN" "TMIN" "TMIN" "TMIN" ...
## ..$ TRIAL : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VALUE : num [1:3100] 2.35 4.49 3.37 2.78 3.55 ...
## $ TAVG:'data.frame': 3100 obs. of 4 variables:
## ..$ VAR : chr [1:3100] "TAVG" "TAVG" "TAVG" "TAVG" ...
## ..$ TRIAL : num [1:3100] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ SIM_YEAR: int [1:3100] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ VALUE : num [1:3100] 8.98 10.67 9.16 9.13 9.62 ...
This figure shows all the weather generator simulations compared to the observed climate dataset at the selected location.
do.call(rbind, gen.loc.yr) %>%
mutate(VAR=factor(VAR)) %>%
ggplot() +
geom_line(aes(SIM_YEAR, VALUE, group=TRIAL), alpha=0.5) +
geom_line(aes(SIM_YEAR, VALUE),
data=mutate(clim.reg.yr, SIM_YEAR=row_number()) %>%
gather(VAR, VALUE, PRCP:TAVG),
color='red') +
facet_wrap(~VAR, scales='free_y') +
labs(x="Simulation Year", y="Value", title="All Generated Datasets with Observed Location Dataset ")
This figure shows only the first weather generator simulation compared to the observed climate dataset at the selected location.
do.call(rbind, gen.loc.yr) %>%
mutate(VAR=factor(VAR)) %>%
filter(TRIAL==1) %>%
ggplot() +
geom_line(aes(SIM_YEAR, VALUE, group=TRIAL), alpha=0.5) +
geom_line(aes(SIM_YEAR, VALUE),
data=mutate(clim.reg.yr, SIM_YEAR=row_number()) %>%
gather(VAR, VALUE, PRCP:TAVG),
color='red') +
facet_wrap(~VAR, scales='free_y') +
labs(x="Simulation Year", y="Value", title="Single Generated Dataset with Observed Location Dataset ")
The following three figures compare the mean, standard deviation, and skewness of the annual precipitation timeseries between the weather generator simulations and the observed climate dataset at the selected location. The boxplots show the distribution of each statistic across the simulations, the red point shows the mean of the statistic for the simulations, and the blue point shows the statistic value for the observed annual timeseries.
do.call(rbind, gen.loc.yr) %>%
mutate(VAR=factor(VAR)) %>%
group_by(VAR, TRIAL) %>%
summarise(MEAN=mean(VALUE)) %>%
ggplot(aes(x=1, y=MEAN)) +
geom_boxplot() +
stat_summary(fun.y=mean, geom="point", color="red", size=3) +
geom_point(aes(x=1, y=MEAN),
data=gather(clim.loc.yr, VAR, VALUE, PRCP:TAVG) %>%
group_by(VAR) %>%
summarise(MEAN=mean(VALUE)),
color='blue', size=3) +
facet_wrap(~VAR, scales='free_y') +
labs(x="", y="Mean", title="Mean of Annual Precipitation for Generated (Red) and Observed (Blue)") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())
do.call(rbind, gen.loc.yr) %>%
mutate(VAR=factor(VAR)) %>%
group_by(VAR, TRIAL) %>%
summarise(SD=sd(VALUE)) %>%
ggplot(aes(x=1, y=SD)) +
geom_boxplot() +
stat_summary(fun.y=mean, geom="point", color="red", size=3) +
geom_point(aes(x=1, y=SD),
data=gather(clim.loc.yr, VAR, VALUE, PRCP:TAVG) %>%
group_by(VAR) %>%
summarise(SD=sd(VALUE)),
color='blue', size=3) +
facet_wrap(~VAR, scales='free_y') +
labs(x="", y="Standard Deviation", title="StDev of Annual Precipitation for Generated (Red) and Observed (Blue)") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())
do.call(rbind, gen.loc.yr) %>%
mutate(VAR=factor(VAR)) %>%
group_by(VAR, TRIAL) %>%
summarise(SKEW=skewness(VALUE)) %>%
ggplot(aes(x=1, y=SKEW)) +
geom_boxplot() +
stat_summary(fun.y=mean, geom="point", color="red", size=3) +
geom_point(aes(x=1, y=SKEW),
data=gather(clim.loc.yr, VAR, VALUE, PRCP:TAVG) %>%
group_by(VAR) %>%
summarise(SKEW=skewness(VALUE)),
color='blue', size=3) +
facet_wrap(~VAR, scales='free_y') +
labs(x="", y="Skewness", title="Skewness of Annual Precipitation for Generated (Red) and Observed (Blue)") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())
The weatherGen::gen_mon_arma() function provides a wrapper around these steps to generate multiple monthly timeseries based on the regional annual precipitation timeseries and the location monthly climate timeseries. This function returns a list object containing the following elements:
$models: list of arma models (only 1 if using ARMA model instead of WARM model)$sim.prcp.yr: output from weatherGen::mc_arimas() containing annual precipitation simulations from the ARMA model$sim.mon: unnamed list of length n.iter with each element containing a data.frame with the generated monthly timeseries for the given location. The columns YEAR_OBS and PRCP_OBS_YEAR contain the year and corresponding annual precipitation sampled from the monthly timeseries of the location.wgen_mon <- weatherGen::gen_month_arma(x.reg.yr=clim.reg.yr, x.loc.mon=clim.loc.mon,
n.iter=50, n.year=nrow(clim.reg.yr))
str(wgen_mon, max.level=1)
## List of 3
## $ models :List of 1
## $ sim.prcp.yr:List of 4
## $ sim.mon :List of 50
Here is an example of the simulated monthly timeseries for the first trial.
head(wgen_mon$sim.mon[[1]])
## YEAR_SIM MONTH PRCP TMAX TMIN TAVG YEAR_OBS PRCP_OBS_YEAR
## 1 1 1 69.20 3.71 -5.72 -1.005 2007 1180
## 2 1 2 51.75 -0.58 -10.56 -5.570 2007 1180
## 3 1 3 125.60 7.19 -5.51 0.840 2007 1180
## 4 1 4 163.08 12.53 1.24 6.885 2007 1180
## 5 1 5 76.45 22.96 7.75 15.355 2007 1180
## 6 1 6 75.78 25.68 13.15 19.415 2007 1180
This figure compares the total annual precipitation for the generated monthly timeseries and the annual arma model simulation. Note that the differences between the two reflect the different annual precipitation in the sampled year for the monthly time series and the simulated year from the ARMA model.
wgen_mon$sim.mon[[1]] %>%
group_by(YEAR_SIM) %>%
summarise(PRCP=sum(PRCP),
YEAR_OBS=mean(YEAR_OBS),
PRCP_OBS=mean(PRCP_OBS_YEAR)) %>%
gather(VAR, VALUE, PRCP, PRCP_OBS) %>%
ggplot(aes(YEAR_SIM, VALUE, color=VAR)) +
geom_line() +
labs(x="Year", y="Annual Precipitation (mm)")
This figure shows the first simulation trial of the monthly climate timeseries for the selected location.
wgen_mon$sim.mon[[1]] %>%
select(YEAR_SIM, MONTH, PRCP:TAVG) %>%
gather(VAR, VALUE, PRCP:TAVG) %>%
mutate(DATE=ymd(paste(YEAR_SIM, MONTH, 1, sep='-'))) %>%
ggplot(aes(DATE, VALUE)) +
geom_line() +
facet_wrap(~VAR, scales='free_y', ncol=1) +
labs(x="Month/Year", y="")
After generating the monthly climate timeseries, we can impose linear trends on the timeseries to perform a climate stress test.
The linear trends are incorporated using an additive temperature factor and a multiplicative precipitation factor. For this example, we’ll consider an 150% increase in precipitation over the simulation period, and a 2 degC increase in temperature. Note that the same temperature factor is applied to all three temperature variables (TMIN, TMAX, TAVG). We’ll impose these trends on only one of the simulated monthly timeseries (chosen at random).
trend.factors <- list(PRCP=1.5, # multiplicative trend factor
TEMP=2) # additive trend factor
# select one trial at random
sim.gen <- wgen_mon$sim.mon[[sample(length(wgen_mon$sim.mon), 1)]]
# create linear trend lines
trend.lines <- list(PRCP=seq(1, trend.factors[['PRCP']], length.out=nrow(sim.gen)),
TEMP=seq(0, trend.factors[['TEMP']], length.out=nrow(sim.gen)))
sim.trend <- mutate(sim.gen,
PRCP.TREND=PRCP * trend.lines[['PRCP']],
TMAX.TREND=TMAX + trend.lines[['TEMP']],
TMIN.TREND=TMIN + trend.lines[['TEMP']],
TAVG.TREND=TAVG + trend.lines[['TEMP']])
This figure compares the original generate timeseries to the adjusted timeseries for the given trend factors.
sim.trend %>%
mutate(PRCP.GEN=PRCP, TMAX.GEN=TMAX, TMIN.GEN=TMIN, TAVG.GEN=TAVG) %>%
gather(VAR.GROUP, VALUE, PRCP.GEN:TAVG.GEN, PRCP.TREND:TAVG.TREND) %>%
separate(VAR.GROUP, c("VAR", "GROUP")) %>%
mutate(DATE=ymd(paste(YEAR_SIM, MONTH, 1, sep='-'))) %>%
ggplot(aes(DATE, VALUE, color=GROUP)) +
geom_line() +
facet_wrap(~VAR, ncol=1, scales='free_y') +
labs(x="Month/Year", y="") +
scale_color_manual('', values=c(GEN='grey20', TREND='chartreuse3'),
labels=c(GEN='w/o trend', TREND='w/ trend'))
The weatherGen::trends_mon() function applies a range of trend factors to the set of monthly timeseries generated by weatherGen::gen_month_arma(). For each pair of temperature and precipitation factors, the function will create a single monthly timeseries by sampling a random iteration from the gen_month_arma() output.
trend.factors <- list(PRCP=seq(0.75, 1.25, by=0.05),
TEMP=seq(0, 5, by=0.5))
sim.trends <- trends_mon(sim.mon.list=wgen_mon$sim.mon,
temp.factors=trend.factors[['TEMP']],
prcp.factors=trend.factors[['PRCP']])
str(sim.trends[[1]])
## List of 3
## $ trend.factors:List of 2
## ..$ PRCP: num 0.75
## ..$ TEMP: num 0
## $ df :'data.frame': 744 obs. of 12 variables:
## ..$ YEAR_SIM : num [1:744] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ MONTH : num [1:744] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ PRCP : num [1:744] 104.7 62 117.1 88.3 100.9 ...
## ..$ TMAX : num [1:744] 2.75 2.38 7.97 16.68 19.83 ...
## ..$ TMIN : num [1:744] -6.71 -8.33 -2.81 3.99 7.9 ...
## ..$ TAVG : num [1:744] -1.98 -2.98 2.58 10.34 13.86 ...
## ..$ YEAR_OBS : num [1:744] 1974 1974 1974 1974 1974 ...
## ..$ PRCP_OBS_YEAR: num [1:744] 1156 1156 1156 1156 1156 ...
## ..$ PRCP.TREND : num [1:744] 104.7 62 117.1 88.3 100.7 ...
## ..$ TMAX.TREND : num [1:744] 2.75 2.38 7.97 16.68 19.83 ...
## ..$ TMIN.TREND : num [1:744] -6.71 -8.33 -2.81 3.99 7.9 ...
## ..$ TAVG.TREND : num [1:744] -1.98 -2.98 2.58 10.34 13.86 ...
## $ trial : int 16
Now we can merge these trends and add columns containing the trend factors for temperature (TEMP_FACTOR) and precipitaiton (PRCP_FACTOR).
df.trends <- lapply(sim.trends, function(x) {
df <- x$df
df$PRCP_FACTOR <- x$trend.factors[['PRCP']]
df$TEMP_FACTOR <- x$trend.factors[['TEMP']]
return(df)
}) %>%
do.call(rbind, .)
summary(df.trends)
## YEAR_SIM MONTH PRCP TMAX
## Min. : 1.0 Min. : 1.00 Min. : 5.7 Min. :-3.74
## 1st Qu.:16.0 1st Qu.: 3.75 1st Qu.: 61.4 1st Qu.: 6.12
## Median :31.5 Median : 6.50 Median : 87.1 Median :16.32
## Mean :31.5 Mean : 6.50 Mean : 94.0 Mean :15.50
## 3rd Qu.:47.0 3rd Qu.: 9.25 3rd Qu.:117.3 3rd Qu.:24.68
## Max. :62.0 Max. :12.00 Max. :516.1 Max. :31.62
## TMIN TAVG YEAR_OBS PRCP_OBS_YEAR
## Min. :-15.85 Min. :-9.27 Min. :1949 Min. : 578
## 1st Qu.: -3.99 1st Qu.: 0.95 1st Qu.:1964 1st Qu.:1058
## Median : 3.45 Median : 9.92 Median :1978 Median :1178
## Mean : 3.62 Mean : 9.56 Mean :1979 Mean :1181
## 3rd Qu.: 11.68 3rd Qu.:18.36 3rd Qu.:1996 3rd Qu.:1298
## Max. : 18.49 Max. :25.05 Max. :2010 Max. :1832
## PRCP.TREND TMAX.TREND TMIN.TREND TAVG.TREND
## Min. : 4.5 Min. :-3.74 Min. :-15.85 Min. :-9.27
## 1st Qu.: 60.2 1st Qu.: 7.48 1st Qu.: -2.84 1st Qu.: 2.33
## Median : 85.6 Median :17.55 Median : 4.67 Median :11.07
## Mean : 94.0 Mean :16.75 Mean : 4.87 Mean :10.81
## 3rd Qu.:117.6 3rd Qu.:26.03 3rd Qu.: 13.13 3rd Qu.:19.58
## Max. :631.9 Max. :36.02 Max. : 23.24 Max. :29.46
## PRCP_FACTOR TEMP_FACTOR
## Min. :0.75 Min. :0.0
## 1st Qu.:0.85 1st Qu.:1.0
## Median :1.00 Median :2.5
## Mean :1.00 Mean :2.5
## 3rd Qu.:1.15 3rd Qu.:4.0
## Max. :1.25 Max. :5.0
This figure shows the monthly precipitation timeseries colored by trend magnitude. This figure only shows precipitation timeseries without any temperature trend. You can see some increase with the ‘redder’ lines that have higher trend factors.
df.trends %>%
filter(TEMP_FACTOR==0) %>%
group_by(PRCP_FACTOR) %>%
mutate(TIMESTEP=row_number()) %>%
ggplot(aes(TIMESTEP, PRCP.TREND, color=PRCP_FACTOR)) +
geom_line(alpha=0.2) +
scale_color_gradient('Precip Factor', low='green', high='red') +
labs(x="Simulation Month", y="Monthly Precipitation (mm)")
This figure shows the generated precipitation timeseries over the complete range of precipitation trend factors including only timeseries with no temperature trend (TEMP_FACTOR==0). This figure clearly shows the increasing trends with higher precipitation trend factors.
df.trends %>%
filter(TEMP_FACTOR==0) %>%
group_by(TEMP_FACTOR, PRCP_FACTOR, YEAR_SIM) %>%
summarise(PRCP=sum(PRCP.TREND)) %>%
ggplot(aes(YEAR_SIM, PRCP, color=PRCP_FACTOR, group=PRCP_FACTOR)) +
geom_line(alpha=0.7) +
scale_color_gradient('Precip Factor', low='green', high='red') +
labs(x="Simulation Year", y="Annual Precipitation (mm)")
This figure shows the generated average temperature timeseries over the range of trend factors including only timeseries with no precipitation trend (PRCP_FACTOR==0). This figure clearly shows the increasing trends with higher temperature trend factors.
df.trends %>%
filter(PRCP_FACTOR==1) %>%
mutate(DATE=ymd(paste(2001, MONTH, 1, sep='-'))) %>%
group_by(TEMP_FACTOR, PRCP_FACTOR, YEAR_SIM) %>%
summarise(TAVG=sum(TAVG.TREND*days_in_month(DATE))/sum(days_in_month(DATE))) %>%
ggplot(aes(YEAR_SIM, TAVG, color=TEMP_FACTOR, group=TEMP_FACTOR)) +
geom_line(alpha=0.7) +
scale_color_gradient('Precip Factor', low='green', high='red') +
labs(x="Simulation Year", y="Annual Mean Temperature (degC)")
sessionInfo()
## R version 3.1.1 (2014-07-10)
## Platform: x86_64-apple-darwin13.1.0 (64-bit)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] weatherGen_0.1 biwavelet_0.17.3 mapproj_1.2-2
## [4] maps_2.3-7 ggmap_2.3 RSQLite.extfuns_0.0.1
## [7] RSQLite_0.11.4 DBI_0.2-7 stringr_0.6.2
## [10] ggplot2_1.0.0 lubridate_1.3.3 tidyr_0.1
## [13] dplyr_0.2 forecast_5.4 timeDate_3010.98
## [16] zoo_1.7-11 devtools_1.5
##
## loaded via a namespace (and not attached):
## [1] assertthat_0.1 car_2.0-20 cluster_1.15.2
## [4] colorspace_1.2-4 digest_0.6.4 evaluate_0.5.5
## [7] fields_7.1 formatR_0.10 Formula_1.1-1
## [10] fracdiff_1.4-2 grid_3.1.1 gtable_0.1.2
## [13] Hmisc_3.14-4 htmltools_0.2.4 httr_0.3
## [16] hydromad_0.9-20 knitr_1.6 labeling_0.2
## [19] lattice_0.20-29 latticeExtra_0.6-26 magrittr_1.0.1
## [22] MASS_7.3-33 memoise_0.2.1 munsell_0.4.2
## [25] nnet_7.3-8 parallel_3.1.1 plyr_1.8.1
## [28] png_0.1-7 proto_0.3-10 quadprog_1.5-5
## [31] RColorBrewer_1.0-5 Rcpp_0.11.2 RCurl_1.95-4.1
## [34] reshape2_1.4 RgoogleMaps_1.2.0.6 rjson_0.2.14
## [37] RJSONIO_1.2-0.2 rmarkdown_0.2.50 scales_0.2.4
## [40] spam_0.41-0 splines_3.1.1 survival_2.37-7
## [43] tools_3.1.1 tseries_0.10-32 whisker_0.3-2
## [46] yaml_2.1.13